library(tidyverse)
path <- "~/R/project/R_tutoring/Git/R_tutoring/Data/kadai01.csv"
df_raw <- read_csv(path, locale = locale(encoding = "UTF-8"))
kadai01 <- df_raw %>%
mutate(pref = c("Hokkaido", "Aomori", "Iwate", "Miyagi", "Akita", "Yamagata",
"Fukushima", "Ibaraki", "Tochigi", "Gunma", "Saitama", "Chiba",
"Tokyo", "Kanagawa", "Niigata", "Toyama", "Ishikawa", "Fukui",
"Yamanashi", "Nagano", "Gifu", "Shizuoka", "Aichi", "Mie", "Shiga",
"Kyoto", "Osaka", "Hyogo", "Nara", "Wakayama", "Tottori", "Shimane",
"Okayama", "Hiroshima", "Yamaguchi", "Tokushima", "Kagawa", "Ehime",
"Kochi", "Fukuoka", "Saga", "Nagasaki", "Kumamoto", "Oita", "Miyazaki",
"Kagoshima", "Okinawa")
)
# show
DT::datatable(kadai01,
rownames = FALSE,
extensions = 'Buttons',
options = list(autoWidth = FALSE,
pageLength = 5,
dom = 'Bfrtip',
buttons = list("csv"),
scrollX = TRUE,
scrollCollapse = TRUE),
class = 'cell-border stripe',
caption = "source: ")| pref | pop | epc |
|---|---|---|
| Hokkaido | 5384 | 11070 |
| Aomori | 1309 | 2720 |
| Iwate | 1280 | 2769 |
| Miyagi | 2334 | 4819 |
| Akita | 1023 | 2177 |
| Yamagata | 1123 | 2385 |
## pref pop epc
## Length:47 Min. : 574 Min. : 1360
## Class :character 1st Qu.: 1114 1st Qu.: 2543
## Mode :character Median : 1649 Median : 3556
## Mean : 2704 Mean : 5678
## 3rd Qu.: 2728 3rd Qu.: 5838
## Max. :13514 Max. :28097
plot(kadai01[,2:3],
xlab = "Tthe Number of Population (thousands)",
ylab = "Electricity Consumption (million kWh")
abline(kadai.lm)head(kadai01_result[sort(kadai01_result$resid, decreasing = F, index = T)[[2]],], n = 10)
# or
kadai01_result %>%
arrange(desc(resid)) %>%
head(n = 10) %>%
knitr::kable()| pref | pop | epc | resid | |
|---|---|---|---|---|
| 13 | Tokyo | 13514 | 28097 | 1188.2946 |
| 34 | Hiroshima | 2845 | 6592 | 638.4044 |
| 17 | Ishikawa | 1154 | 3159 | 526.7176 |
| 16 | Toyama | 1067 | 2918 | 456.5953 |
| 40 | Fukuoka | 5103 | 10825 | 436.4397 |
| 33 | Okayama | 1922 | 4572 | 431.2796 |
| 28 | Hyogo | 5537 | 11626 | 385.0152 |
| 18 | Fukui | 787 | 2217 | 305.5466 |
| 20 | Nagano | 2100 | 4710 | 219.6677 |
| 26 | Kyoto | 2610 | 5703 | 210.9707 |
環境省が公表する「環境統計集(平成29年度版)」 のデータを用いて、グラフを1つ作成し、考察しなさい。枠内には、①使用データとそのURL、②プログラムソース、③グラフ、④考察の4点を記載しなさい。なお、データ整形は、必ずしもRを使用しなくても良い。必要があれば、データ整形に関する説明も、①使用データの部分に記載しなさい。
コード等をwordに貼って、ファイルを提出する
kadai.lm##
## Call:
## lm(formula = epc ~ pop, data = kadai01)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1318.15 -129.32 -35.57 174.31 1188.29
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 365.69731 79.91473 4.576 3.72e-05 ***
## pop 1.96411 0.02091 93.913 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 387 on 45 degrees of freedom
## Multiple R-squared: 0.9949, Adjusted R-squared: 0.9948
## F-statistic: 8820 on 1 and 45 DF, p-value: < 2.2e-16
残渣(residuals)は、実際の値と予測値の差分で計算される。
以下の図だと、予測値の青の線と実際の値である赤の線の間が残差である。
# load packages
library(modelr)
library(scales)
library(plotly)
# define the data for visualization
kadai01_plot <- kadai01 %>%
add_predictions(kadai.lm) %>%
add_residuals(kadai.lm) %>%
`names<-`(value = c("pref", "pop", "epc", "predicted_epc", "residuals")) %>%
pivot_longer(cols = c(-pref, -pop), names_to = "vars", values_to = "value")
# raw v.s. expected value
g1 <-
kadai01_plot %>%
filter(vars != "residuals") %>%
ggplot(aes(pop, value, color = vars, label = pref)) +
geom_point() +
geom_line() +
scale_x_continuous(labels = comma) +
scale_y_continuous(labels = comma) +
labs(title = "The scatter plot of the predicted value and raw data based on `kadai01`",
x = "# of population",
y = "Electricity",
color = "Name") +
theme_minimal()
ggplotly(g1)実際の値である人口の変化によって生じる残差の値を、グラフ化したのが以下の図になる。
# residual
library(ggrepel)
kadai01_plot %>%
filter(vars == "residuals") %>%
ggplot(aes(pop, value, label = pref)) +
geom_hline(yintercept = 0, colour = "black", linetype = "dashed") +
geom_point() +
geom_line() +
geom_label_repel(nudge_x = TRUE, nudge_y = TRUE, check_overlap = TRUE) +
labs(title = "The scatter plot of the residual",
x = "Pop",
y = "Residuals") +
theme_minimal()pacman::p_load("NipponMap", "sf", "ggthemes")
map <- read_sf(system.file("shapes/jpn.shp", package = "NipponMap")[1],
crs = "+proj=longlat +datum=WGS84")
map2 <-
kadai01_plot %>%
pivot_wider(id_cols = c(pref, pop), names_from = vars, values_from = value) %>%
rename(name = pref) %>%
left_join(map, by = "name") %>%
st_sf()
ggplot(map2, aes(fill = residuals)) +
geom_sf(size = .1) +
labs(title = "The Residuals Accross Japanese Prefectures") +
scale_fill_gradient2(
name = "Residuals",
labels = scales::number_format(big.mark = ","),
low = "#15C6E3",
mid = "white",
high = "#EB7411",
midpoint = 0) +
theme_pander()